Skip to content

Conversation

@shubhe25p
Copy link
Contributor

@shubhe25p shubhe25p commented Nov 29, 2025

Add %lc support to libc printf by utilizing wcrtomb internal function, also updated parser logic and added relevant end to end snprintf tests. Resolves #166598

@llvmbot llvmbot added the libc label Nov 29, 2025
@llvmbot
Copy link
Member

llvmbot commented Nov 29, 2025

@llvm/pr-subscribers-libc

Author: Shubh Pachchigar (shubhe25p)

Changes

Full diff: https://github.com/llvm/llvm-project/pull/169983.diff

4 Files Affected:

  • (modified) libc/src/stdio/printf_core/CMakeLists.txt (+3)
  • (modified) libc/src/stdio/printf_core/char_converter.h (+24-4)
  • (modified) libc/test/src/stdio/printf_core/CMakeLists.txt (+1)
  • (modified) libc/test/src/stdio/printf_core/converter_test.cpp (+54-1)
diff --git a/libc/src/stdio/printf_core/CMakeLists.txt b/libc/src/stdio/printf_core/CMakeLists.txt
index 624129b2b36e7..f35a65de1f110 100644
--- a/libc/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/src/stdio/printf_core/CMakeLists.txt
@@ -111,6 +111,7 @@ add_header_library(
     .printf_config
     .writer
     libc.include.inttypes
+    libc.hdr.types.wchar_t
     libc.src.__support.big_int
     libc.src.__support.common
     libc.src.__support.CPP.limits
@@ -123,6 +124,8 @@ add_header_library(
     libc.src.__support.integer_to_string
     libc.src.__support.libc_assert
     libc.src.__support.uint128
+    libc.src.__support.wchar.mbstate
+    libc.src.__support.wchar.wcrtomb
     libc.src.__support.StringUtil.error_to_string
     libc.src.string.memory_utils.inline_memcpy
 )
diff --git a/libc/src/stdio/printf_core/char_converter.h b/libc/src/stdio/printf_core/char_converter.h
index fd2eb2553887a..31dd34fe7a797 100644
--- a/libc/src/stdio/printf_core/char_converter.h
+++ b/libc/src/stdio/printf_core/char_converter.h
@@ -1,4 +1,4 @@
-//===-- String Converter for printf -----------------------------*- C++ -*-===//
+//===-- Character Converter for printf -----------------------------*- C++ -*-===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -9,7 +9,10 @@
 #ifndef LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
 #define LLVM_LIBC_SRC_STDIO_PRINTF_CORE_CHAR_CONVERTER_H
 
+#include "hdr/types/wchar_t.h"
 #include "src/__support/macros/config.h"
+#include "src/__support/wchar/mbstate.h"
+#include "src/__support/wchar/wcrtomb.h"
 #include "src/stdio/printf_core/converter_utils.h"
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
@@ -20,8 +23,11 @@ namespace printf_core {
 template <WriteMode write_mode>
 LIBC_INLINE int convert_char(Writer<write_mode> *writer,
                              const FormatSection &to_conv) {
-  char c = static_cast<char>(to_conv.conv_val_raw);
-
+  char c;
+  wchar_t wc;
+  char mb_str[MB_LEN_MAX];
+  internal::mbstate internal_mbstate = {0};
+  int ret = 0;
   constexpr int STRING_LEN = 1;
 
   size_t padding_spaces =
@@ -33,7 +39,21 @@ LIBC_INLINE int convert_char(Writer<write_mode> *writer,
     RET_IF_RESULT_NEGATIVE(writer->write(' ', padding_spaces));
   }
 
-  RET_IF_RESULT_NEGATIVE(writer->write(c));
+  if (to_conv.length_modifier == LengthModifier::l) {
+    wc = static_cast<wchar_t>(to_conv.conv_val_raw);
+    ret = internal::wcrtomb(mb_str, wc, &internal_mbstate);
+    if (ret <= 0) {
+      return -1;
+    }
+
+    for (int i = 0; i < ret; i++) {
+      RET_IF_RESULT_NEGATIVE(writer->write(mb_str[i]));
+    }
+
+  } else {
+    c = static_cast<char>(to_conv.conv_val_raw);
+    RET_IF_RESULT_NEGATIVE(writer->write(c));
+  }
 
   // If the padding is on the right side, write the spaces last.
   if (padding_spaces > 0 &&
diff --git a/libc/test/src/stdio/printf_core/CMakeLists.txt b/libc/test/src/stdio/printf_core/CMakeLists.txt
index ff7ebbc4f5fd0..a4c919420777d 100644
--- a/libc/test/src/stdio/printf_core/CMakeLists.txt
+++ b/libc/test/src/stdio/printf_core/CMakeLists.txt
@@ -35,4 +35,5 @@ add_libc_unittest(
     libc.src.stdio.printf_core.converter
     libc.src.stdio.printf_core.writer
     libc.src.stdio.printf_core.core_structs
+    libc.hdr.types.wchar_t
 )
diff --git a/libc/test/src/stdio/printf_core/converter_test.cpp b/libc/test/src/stdio/printf_core/converter_test.cpp
index 2dae2a22c864c..026e36747d4df 100644
--- a/libc/test/src/stdio/printf_core/converter_test.cpp
+++ b/libc/test/src/stdio/printf_core/converter_test.cpp
@@ -9,7 +9,7 @@
 #include "src/stdio/printf_core/converter.h"
 #include "src/stdio/printf_core/core_structs.h"
 #include "src/stdio/printf_core/writer.h"
-
+#include "hdr/types/wchar_t.h"
 #include "test/UnitTest/Test.h"
 
 class LlvmLibcPrintfConverterTest : public LIBC_NAMESPACE::testing::Test {
@@ -255,3 +255,56 @@ TEST_F(LlvmLibcPrintfConverterTest, OctConversion) {
   ASSERT_STREQ(str, "1234");
   ASSERT_EQ(writer.get_chars_written(), size_t{4});
 }
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversion) {
+
+  LIBC_NAMESPACE::printf_core::FormatSection section;
+  section.has_conv = true;
+  section.raw_string = "%c";
+  section.conv_name = 'c';
+  section.length_modifier = LIBC_NAMESPACE::printf_core::LengthModifier::l;
+  section.conv_val_raw = static_cast<wchar_t>(L'S');
+
+  LIBC_NAMESPACE::printf_core::convert(&writer, section);
+
+  wb.buff[wb.buff_cur] = '\0';
+
+  ASSERT_STREQ(str, "S");
+  ASSERT_EQ(writer.get_chars_written(), size_t{1});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionLeftJustified) {
+  LIBC_NAMESPACE::printf_core::FormatSection left_justified_conv;
+  left_justified_conv.has_conv = true;
+  left_justified_conv.raw_string = "%-4c";
+  left_justified_conv.conv_name = 'c';
+  left_justified_conv.length_modifier =
+      LIBC_NAMESPACE::printf_core::LengthModifier::l;
+  left_justified_conv.flags =
+      LIBC_NAMESPACE::printf_core::FormatFlags::LEFT_JUSTIFIED;
+  left_justified_conv.min_width = 4;
+  left_justified_conv.conv_val_raw = static_cast<wchar_t>(L'S');
+
+  LIBC_NAMESPACE::printf_core::convert(&writer, left_justified_conv);
+  wb.buff[wb.buff_cur] = '\0';
+
+  ASSERT_STREQ(str, "S   ");
+  ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}
+
+TEST_F(LlvmLibcPrintfConverterTest, WideCharConversionRightJustified) {
+  LIBC_NAMESPACE::printf_core::FormatSection right_justified_conv;
+  right_justified_conv.has_conv = true;
+  right_justified_conv.raw_string = "%4c";
+  right_justified_conv.conv_name = 'c';
+  right_justified_conv.length_modifier =
+      LIBC_NAMESPACE::printf_core::LengthModifier::l;
+  right_justified_conv.min_width = 4;
+  right_justified_conv.conv_val_raw = static_cast<wchar_t>(L'S');
+
+  LIBC_NAMESPACE::printf_core::convert(&writer, right_justified_conv);
+  wb.buff[wb.buff_cur] = '\0';
+
+  ASSERT_STREQ(str, "   S");
+  ASSERT_EQ(writer.get_chars_written(), size_t{4});
+}

@github-actions
Copy link

github-actions bot commented Nov 29, 2025

✅ With the latest revision this PR passed the C/C++ code formatter.

@shubhe25p
Copy link
Contributor Author

shubhe25p commented Nov 29, 2025

This PR has spun up three new interesting directions, each of which seems feasible with some guidance. I am not sure which one can be pulled into this PR and if it is even an issue. Let me know your thoughts @michaelrj-google:

  1. wchar is a distinct integral type in C++ standard but BigInt guard clause will throw an error as wchar is not recognized. Here I have a workaround where I cast to unsigned int. So the work will involve adding wchar to BigInt.
  2. Adding wchar support on windows, currently a static assert in wcrtomb causes a build failure on windows platforms and work will involve adding UTF-16 support to wcrtomb
  3. %ls support in printf, which is a natural extension to this work and would involve a looping over a sequence of wchar


RET_IF_RESULT_NEGATIVE(writer->write(c));
if (to_conv.length_modifier == LengthModifier::l) {
wc = static_cast<wchar_t>(static_cast<unsigned int>(to_conv.conv_val_raw));
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The argument type would be wint_t per standard, so you might need to handle WEOF and then static_cast.

You mentioned BigInt problems in the review thread, could you elaborate how are these relevant?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks this makes sense, what I was referring to was that naively casting from conv_val_raw (BigInt) to wchar_t leads to an error in BigInt to(), as the casting has a guard clause that essentially checks:

struct BigInt {
...
template <typename T>
  LIBC_INLINE constexpr cpp::enable_if_t<
      cpp::is_integral_v<T> && !cpp::is_same_v<T, bool>, T>
  to() const { 
       ... }

is_integral_v will return false if the type is not one of:

char, signed char, unsigned char, short, unsigned short, int,
      unsigned int, long, unsigned long, long long, unsigned long long, bool

That is the reason why I cast to unsigned int first. Let me know your thoughts

@vonosmas
Copy link
Contributor

vonosmas commented Dec 1, 2025

Thank you for starting to work on this!

This PR has spun up three new interesting directions, each of which seems feasible with some guidance. I am not sure which one can be pulled into this PR and if it is even an issue. Let me know your thoughts @michaelrj-google:

  1. wchar is a distinct integral type in C++ standard but BigInt guard clause will throw an error as wchar is not recognized. Here I have a workaround where I cast to unsigned int. So the work will involve adding wchar to BigInt.

See comment in CL - I think you need to roundtrip the input arguments through wint_t.

  1. Adding wchar support on windows, currently a static assert in wcrtomb causes a build failure on windows platforms and work will involve adding UTF-16 support to wcrtomb

Could you figure out why is our UTF-32-only wcrtomb gets pulled in by this change? I.e. is there a particular function that is enabled on Windows and depends on the printf functionality?

  1. %ls support in printf, which is a natural extension to this work and would involve a looping over a sequence of wchar

That sounds like a natural follow-up!

@shubhe25p shubhe25p marked this pull request as draft December 3, 2025 06:35
@shubhe25p shubhe25p force-pushed the br_printf_lc_support branch from c2a7797 to 7a33e23 Compare December 3, 2025 08:45
@shubhe25p shubhe25p closed this Dec 3, 2025
@shubhe25p shubhe25p reopened this Dec 3, 2025
@github-actions
Copy link

github-actions bot commented Dec 3, 2025

🐧 Linux x64 Test Results

✅ The build succeeded and no tests ran. This is expected in some build configurations.

@shubhe25p
Copy link
Contributor Author

shubhe25p commented Dec 6, 2025

Could you figure out why is our UTF-32-only wcrtomb gets pulled in by this change? I.e. is there a particular function that is enabled on Windows and depends on the printf functionality?

Thank you so much for the feedback, it is really helpful @vonosmas. I am not able to find this specific function, can you please help? I can add a macro to guard the wcrtomb in char_converter.h as suggested by @michaelrj-google but I am not sure.

@shubhe25p shubhe25p force-pushed the br_printf_lc_support branch 3 times, most recently from 97751a8 to cbf857a Compare December 7, 2025 22:40
Add support for %lc in printf by calling internal
wcrtomb function and relevant end-to-end snprintf
test. Additionally, printf parser can also
recognize length modifier. Added a flag to disable
wchar support on windows platform.
@shubhe25p shubhe25p force-pushed the br_printf_lc_support branch from cbf857a to 552b263 Compare December 7, 2025 22:55
@shubhe25p shubhe25p marked this pull request as ready for review December 7, 2025 23:00
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

Projects

None yet

Development

Successfully merging this pull request may close these issues.

[libc] Support %lc in printf

4 participants